1. Import Data

library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2   3.5.1      ✔ fma       2.5   
## ✔ forecast  8.23.0     ✔ expsmooth 2.3
## 
library(fpp)
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
## 
## Attaching package: 'fpp'
## The following objects are masked from 'package:fpp2':
## 
##     ausair, ausbeer, austa, austourists, debitcards, departures,
##     elecequip, euretail, guinearice, oil, sunspotarea, usmelec
library(readr)
library(readxl)
library(forecast)
library(ggplot2)

carsls = read.csv("https://raw.githubusercontent.com/su-min-oh/Final-Exam/main/TOTALSA.csv")

#make it a time series data
carsls_ts = ts(carsls$Sales.Units.in.Millions.,start=c(2019,1),frequency=12)

2. Plot and Inference

plot(carsls_ts)

carsls_sub = window(carsls_ts,start=2022)
plot(carsls_sub)

Observation Summary

The data is about the car sales from Jan 2019 to Feb 2024. The plot shows the drastic drop in 2020, and this is assumed to be due to outbreak of Covid. Since the graph has big changes, I cut the data from 2022 to do the further forecast. From the cut graph, I see there is a gradually increasing trend, but it needs further analysis to decide on that.

3. Central Tendancy

#Min, max, mean, median, 1st and 3rd Quartile values
summary(carsls_ts)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   8.944  14.189  15.938  15.612  16.966  18.697
summary(carsls_sub)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.29   14.19   15.47   15.18   15.95   16.45
#boxplot
boxplot(carsls_ts)

boxplot(carsls_sub)

Observation Summary (for the whole data)

  1. The maximum value is 18M, while minimum is only around 8M.
  2. Mean is almost as same as Median, meaning that the data is not skewed to one side.
  3. Considering that 3rd quartile is 16M and 1st quartile is 14M, IQR is 2M
  4. By looking at the boxplot, I could find out an outlier.

Observation Summary (for the subset data)

  1. The maximum value is 16M, while minimum is only around 13M.
  2. Like in the whole data, Mean is almost as same as Median, meaning that the data is not skewed to one side.
  3. Considering that 3rd quartile is 16M and 1st quartile is 14M, IQR is 2M. This is similar with the whole data.
  4. Boxplot showed different figure with the one made from the whole data. There were no outliers detected. So I’m confident to utilize this subset of data to forecast from now.

4. Decomposition

carsls_decomp = decompose(carsls_sub)
carsls_decomp
## $x
##         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct
## 2022 14.866 14.168 14.253 14.681 13.286 13.669 13.867 14.064 14.136 15.091
## 2023 15.665 15.434 15.668 16.446 15.948 16.428 16.302 15.909 16.193 15.799
## 2024 15.513 16.191                                                        
##         Nov    Dec
## 2022 14.733 13.935
## 2023 15.945 16.386
## 2024              
## 
## $seasonal
##              Jan         Feb         Mar         Apr         May         Jun
## 2022  0.37197569 -0.03735764  0.03405903  0.69685069  0.11885069  0.44622569
## 2023  0.37197569 -0.03735764  0.03405903  0.69685069  0.11885069  0.44622569
## 2024  0.37197569 -0.03735764                                                
##              Jul         Aug         Sep         Oct         Nov         Dec
## 2022 -0.12229514 -0.27592014 -0.39777431  0.42472569 -0.11773264 -1.14160764
## 2023 -0.12229514 -0.27592014 -0.39777431  0.42472569 -0.11773264 -1.14160764
## 2024                                                                        
## 
## $trend
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2022       NA       NA       NA       NA       NA       NA 14.26237 14.34842
## 2023 15.21937 15.39771 15.56029 15.67550 15.75550 15.90812 16.00392 16.02913
## 2024       NA       NA                                                      
##           Sep      Oct      Nov      Dec
## 2022 14.46012 14.59262 14.77708 15.00296
## 2023       NA       NA       NA       NA
## 2024                                    
## 
## $random
##               Jan          Feb          Mar          Apr          May
## 2022           NA           NA           NA           NA           NA
## 2023  0.073649306  0.073649306  0.073649306  0.073649306  0.073649306
## 2024           NA           NA                                       
##               Jun          Jul          Aug          Sep          Oct
## 2022           NA -0.273079861 -0.008496528  0.073649306  0.073649306
## 2023  0.073649306  0.420378472  0.155795139           NA           NA
## 2024                                                                 
##               Nov          Dec
## 2022  0.073649306  0.073649306
## 2023           NA           NA
## 2024                          
## 
## $figure
##  [1]  0.37197569 -0.03735764  0.03405903  0.69685069  0.11885069  0.44622569
##  [7] -0.12229514 -0.27592014 -0.39777431  0.42472569 -0.11773264 -1.14160764
## 
## $type
## [1] "additive"
## 
## attr(,"class")
## [1] "decomposed.ts"
plot(carsls_decomp)

# Plot for time series adjusted for seasonality
carsls_seasadj = seasadj(carsls_decomp)
plot(carsls_sub)
lines(carsls_seasadj,col="red")

5. Naive Method

#Output
carsls_naive = naive(carsls_sub)

#Residual Analysis
naive_res = residuals(carsls_naive)

#Plot of residuals
plot(naive_res)
abline(h=0, col="red")

#Histogram of Residuals
hist(naive_res)

#Plot of fitted vs. residuals
naive_fit = fitted(carsls_naive)
ggplot(data = data.frame(naive_fit, naive_res), aes(x = naive_fit, y = naive_res)) +
  geom_point(shape = 1) + geom_hline(yintercept = 0,color="red")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

#Plot of actual vs. residuals
plot(as.numeric(carsls_sub),naive_res, xlab="actual",ylab="residuals")
abline(h=0,col="red")

#Acf
plot(Acf(naive_res))

#Measures of accuracy
accuracy(carsls_naive)
##                 ME      RMSE     MAE       MPE     MAPE      MASE       ACF1
## Training set 0.053 0.6498938 0.51412 0.2460432 3.411387 0.3230122 -0.4356982
#Forecast for next year (table, plot)
naive_fc = naive(carsls_sub,h=12)
naive_fc
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 2024         16.191 15.35813 17.02387 14.91723 17.46477
## Apr 2024         16.191 15.01314 17.36886 14.38962 17.99238
## May 2024         16.191 14.74842 17.63358 13.98477 18.39723
## Jun 2024         16.191 14.52526 17.85674 13.64346 18.73854
## Jul 2024         16.191 14.32864 18.05336 13.34277 19.03923
## Aug 2024         16.191 14.15089 18.23111 13.07092 19.31108
## Sep 2024         16.191 13.98743 18.39457 12.82093 19.56107
## Oct 2024         16.191 13.83528 18.54672 12.58824 19.79376
## Nov 2024         16.191 13.69238 18.68962 12.36969 20.01231
## Dec 2024         16.191 13.55723 18.82477 12.16299 20.21901
## Jan 2025         16.191 13.42867 18.95333 11.96639 20.41561
## Feb 2025         16.191 13.30585 19.07615 11.77854 20.60346
plot(naive_fc)

Summary of this forecasting technique

  • How good is the accuracy? : For now, it’s hard to measure it’s accuracy since I haven’t done the accuracy test for other forecast models. Just to assume with the values of MAPE, the absolute percentage error is only about 3%, which is very low, so this method can be accurate. But still, it needs to be compared with other models later on.

  • Predicted value : It predicts the value for next 12months for 16.191, uniformly, as it predicts based on the data from the last period.

6. Simple Moving Averages

MA3 = ma(carsls_sub, order=3)
MA6 = ma(carsls_sub, order=6)
MA9 = ma(carsls_sub, order=9)

plot(carsls_sub)
lines(MA3,col="red")
lines(MA6,col="blue")
lines(MA9,col="green")

#Forecast for the next 12 months using the best one(bonus)
MA3_fc = forecast(MA3, h=12)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
MA6_fc = forecast(MA6, h=12)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
MA9_fc = forecast(MA9,h=12)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
accuracy(MA3_fc)
##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.06672768 0.2195715 0.1743123 0.4275958 1.167909 0.1016876
##                   ACF1
## Training set 0.4996581
accuracy(MA6_fc)
##                       ME       RMSE        MAE        MPE      MAPE       MASE
## Training set 0.002340272 0.06812097 0.05171541 0.02845714 0.3385603 0.02762252
##                    ACF1
## Training set 0.08423552
accuracy(MA9_fc)
##                        ME       RMSE        MAE         MPE      MAPE      MASE
## Training set -0.003493074 0.09149204 0.07412256 -0.01203334 0.4849304 0.0405468
##                    ACF1
## Training set -0.0120887
#I chose MA6

plot(MA6_fc)

7. Simple Smoothing

#Simple smoothing for next 12 months
carsls_ets = ets(carsls_sub,model="ANN")
carsls_ets_fc = forecast(carsls_ets,h=12)
summary(carsls_ets_fc)
## 
## Forecast method: ETS(A,N,N)
## 
## Model Information:
## ETS(A,N,N) 
## 
## Call:
## ets(y = carsls_sub, model = "ANN")
## 
##   Smoothing parameters:
##     alpha = 0.5558 
## 
##   Initial states:
##     l = 14.5659 
## 
##   sigma:  0.5893
## 
##      AIC     AICc      BIC 
## 61.13388 62.22479 64.90817 
## 
## Error measures:
##                     ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.1008798 0.566215 0.4348502 0.5553676 2.889801 0.2732084
##                    ACF1
## Training set -0.1227497
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 2024       16.02361 15.26835 16.77887 14.86854 17.17869
## Apr 2024       16.02361 15.15954 16.88769 14.70212 17.34510
## May 2024       16.02361 15.06297 16.98425 14.55444 17.49278
## Jun 2024       16.02361 14.97527 17.07196 14.42030 17.62692
## Jul 2024       16.02361 14.89435 17.15287 14.29655 17.75067
## Aug 2024       16.02361 14.81886 17.22837 14.18110 17.86612
## Sep 2024       16.02361 14.74782 17.29940 14.07246 17.97476
## Oct 2024       16.02361 14.68054 17.36668 13.96956 18.07766
## Nov 2024       16.02361 14.61647 17.43075 13.87158 18.17564
## Dec 2024       16.02361 14.55520 17.49203 13.77786 18.26936
## Jan 2025       16.02361 14.49638 17.55085 13.68791 18.35931
## Feb 2025       16.02361 14.43974 17.60748 13.60129 18.44593
#Residual Analysis
carsls_ets_res = residuals(carsls_ets_fc)

#Plot of residuals
plot(carsls_ets_res)
abline(h=0,col="red")

#Histogram
hist(carsls_ets_res)

#Plot of fitted vs. residuals
carsls_ets_fitted = fitted(carsls_ets_fc)
ggplot(data = data.frame(fitted = carsls_ets_fitted, residuals = carsls_ets_res), 
       aes(x = fitted, y = residuals)) + geom_point() + geom_hline(yintercept = 0, color="red")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

#Plot of actual vs. residuals
plot(as.numeric(carsls_sub),carsls_ets_res, xlab="actual",ylab="residuals")
abline(h=0,col="red")

#Acf
plot(Acf(carsls_ets_res))

#Measures of accuracy
accuracy(carsls_ets_fc)
##                     ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.1008798 0.566215 0.4348502 0.5553676 2.889801 0.2732084
##                    ACF1
## Training set -0.1227497
#Forecast for next year (table, plot)
carsls_ets_fc
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 2024       16.02361 15.26835 16.77887 14.86854 17.17869
## Apr 2024       16.02361 15.15954 16.88769 14.70212 17.34510
## May 2024       16.02361 15.06297 16.98425 14.55444 17.49278
## Jun 2024       16.02361 14.97527 17.07196 14.42030 17.62692
## Jul 2024       16.02361 14.89435 17.15287 14.29655 17.75067
## Aug 2024       16.02361 14.81886 17.22837 14.18110 17.86612
## Sep 2024       16.02361 14.74782 17.29940 14.07246 17.97476
## Oct 2024       16.02361 14.68054 17.36668 13.96956 18.07766
## Nov 2024       16.02361 14.61647 17.43075 13.87158 18.17564
## Dec 2024       16.02361 14.55520 17.49203 13.77786 18.26936
## Jan 2025       16.02361 14.49638 17.55085 13.68791 18.35931
## Feb 2025       16.02361 14.43974 17.60748 13.60129 18.44593
plot(carsls_ets_fc)

Summary of this forecasting technique

  • How good is the accuracy? : Considering the MAPE of naive was 3.41, accuracy of this simple smoothing method is better than Naive. Also, Root mean squared error is 0.5, which can be considered really low.

  • Predicted value : By checking the table, it predicts the value of the time series to be 16.02 for the whole forecast period. The plot is similar to Naive method since it also shows the same value for the given period. It is because this model forecasts only based on the “level” of the data.

8. Holt-Winters

#forecast for next 12 months
carsls_hw = hw(carsls_sub,h=12)
summary(carsls_hw)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
## hw(y = carsls_sub, h = 12)
## 
##   Smoothing parameters:
##     alpha = 2e-04 
##     beta  = 2e-04 
##     gamma = 0.9997 
## 
##   Initial states:
##     l = 13.424 
##     b = 0.1305 
##     s = -0.9951 -0.2147 0.5375 -0.7997 -0.2747 -0.8722
##            0.2528 -1.0152 0.5749 0.2429 0.6483 1.9153
## 
##   sigma:  1.0571
## 
##       AIC      AICc       BIC 
##  96.75263 173.25263 118.14027 
## 
## Error measures:
##                       ME     RMSE       MAE        MPE     MAPE      MASE
## Training set -0.01760396 0.655559 0.5118556 -0.1586194 3.307403 0.3215895
##                   ACF1
## Training set 0.1938631
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 2024       17.23629 15.88162 18.59096 15.16450 19.30809
## Apr 2024       18.01447 16.65980 19.36914 15.94268 20.08626
## May 2024       17.51639 16.16172 18.87107 15.44460 19.58819
## Jun 2024       17.99612 16.64145 19.35080 15.92433 20.06792
## Jul 2024       17.86972 16.51505 19.22440 15.79792 19.94152
## Aug 2024       17.47630 16.12162 18.83097 15.40450 19.54809
## Sep 2024       17.75974 16.40506 19.11441 15.68794 19.83154
## Oct 2024       17.36545 16.01077 18.72013 15.29365 19.43725
## Nov 2024       17.51113 16.15645 18.86581 15.43932 19.58293
## Dec 2024       17.95157 16.59689 19.30626 15.87976 20.02338
## Jan 2025       17.07859 15.72390 18.43328 15.00678 19.15040
## Feb 2025       17.75657 16.40188 19.11126 15.68475 19.82839
#Residual Analysis
carsls_hw_res = residuals(carsls_hw)
#Plot
plot(carsls_hw_res)
abline(h=0,col="red")

#Histogram
hist(carsls_hw_res)

#Plot of fitted vs. residuals
carsls_hw_fitted = fitted(carsls_hw)
ggplot(data = data.frame(fitted = carsls_hw_fitted, residuals = carsls_hw_res), 
       aes(x = fitted, y = residuals)) + geom_point() + geom_hline(yintercept = 0, color="red")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

#Plot of actual vs. residuals
plot(as.numeric(carsls_sub),carsls_hw_res, xlab="actual",ylab="residuals")
abline(h=0,col="red")

#Acf
plot(Acf(carsls_hw_res))

#Measures of accuracy
accuracy(carsls_hw)
##                       ME     RMSE       MAE        MPE     MAPE      MASE
## Training set -0.01760396 0.655559 0.5118556 -0.1586194 3.307403 0.3215895
##                   ACF1
## Training set 0.1938631
#Forecast for next year (table, plot)
carsls_hw
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 2024       17.23629 15.88162 18.59096 15.16450 19.30809
## Apr 2024       18.01447 16.65980 19.36914 15.94268 20.08626
## May 2024       17.51639 16.16172 18.87107 15.44460 19.58819
## Jun 2024       17.99612 16.64145 19.35080 15.92433 20.06792
## Jul 2024       17.86972 16.51505 19.22440 15.79792 19.94152
## Aug 2024       17.47630 16.12162 18.83097 15.40450 19.54809
## Sep 2024       17.75974 16.40506 19.11441 15.68794 19.83154
## Oct 2024       17.36545 16.01077 18.72013 15.29365 19.43725
## Nov 2024       17.51113 16.15645 18.86581 15.43932 19.58293
## Dec 2024       17.95157 16.59689 19.30626 15.87976 20.02338
## Jan 2025       17.07859 15.72390 18.43328 15.00678 19.15040
## Feb 2025       17.75657 16.40188 19.11126 15.68475 19.82839
plot(carsls_hw)

Summary of this forecasting technique

  • How good is the accuracy? : MAPE is smaller than Naive, but larger than simple smoothing. RMSE is also larger than simple smoothing.

  • Predicted time series value : can be checked in the above table. Unlike Naive or simple smoothing, it has various different values for the given period.

9. ARIMA or Box-Jenkins

# Is Time Series data Stationary? How did you verify?
# How many differences are needed to make it stationary?
# Is Seasonality component needed?

tsdisplay(carsls_sub)

ndiffs(carsls_sub)
## [1] 1
nsdiffs(carsls_sub)
## [1] 0
Acf(carsls_sub)

#   Plot the Time Series chart of the differenced series. 
#   Plot the ACF and PACF plots of the differenced series. 
# Based on the ACF and PACF, which are the possible ARIMA models? 

carslsdiff = diff(carsls_sub,1)
tsdisplay(carslsdiff)

arima1 = Arima(carsls_sub, order = c(1, 1, 1))
arima2 = Arima(carsls_sub, order = c(2, 1, 1))
arima3 =Arima(carsls_sub, order = c(3, 1, 1))

a=c(arima1$aic,arima1$bic,arima1$sigma2)
b=c(arima2$aic,arima2$bic,arima2$sigma2)
c=c(arima3$aic,arima3$bic,arima3$sigma2)

arimas = rbind(a,b,c)
rownames(arimas) = c("(1,1,1)","(2,1,1)","(3,1,1)")
colnames(arimas) = c("aic","bic","sigma")

arimas
##              aic      bic     sigma
## (1,1,1) 49.03047 52.68710 0.3518404
## (2,1,1) 43.92843 48.80394 0.2425013
## (3,1,1) 46.74860 52.84297 0.2907997
auto.arima(carsls_sub,trace=TRUE, stepwise = FALSE)
## 
##  ARIMA(0,1,0)                               : 51.57352
##  ARIMA(0,1,0)            with drift         : 53.77824
##  ARIMA(0,1,1)                               : 48.24721
##  ARIMA(0,1,1)            with drift         : 49.88389
##  ARIMA(0,1,2)                               : 48.51251
##  ARIMA(0,1,2)            with drift         : 50.95166
##  ARIMA(0,1,3)                               : 50.25856
##  ARIMA(0,1,3)            with drift         : 53.11982
##  ARIMA(0,1,4)                               : Inf
##  ARIMA(0,1,4)            with drift         : Inf
##  ARIMA(0,1,5)                               : Inf
##  ARIMA(0,1,5)            with drift         : Inf
##  ARIMA(1,1,0)                               : 48.62806
##  ARIMA(1,1,0)            with drift         : 50.76726
##  ARIMA(1,1,1)                               : 50.17333
##  ARIMA(1,1,1)            with drift         : 52.19397
##  ARIMA(1,1,2)                               : 51.28083
##  ARIMA(1,1,2)            with drift         : 54.071
##  ARIMA(1,1,3)                               : Inf
##  ARIMA(1,1,3)            with drift         : Inf
##  ARIMA(1,1,4)                               : Inf
##  ARIMA(1,1,4)            with drift         : Inf
##  ARIMA(2,1,0)                               : 48.20508
##  ARIMA(2,1,0)            with drift         : 49.80281
##  ARIMA(2,1,1)                               : Inf
##  ARIMA(2,1,1)            with drift         : Inf
##  ARIMA(2,1,2)                               : 49.31717
##  ARIMA(2,1,2)            with drift         : 52.19999
##  ARIMA(2,1,3)                               : Inf
##  ARIMA(2,1,3)            with drift         : Inf
##  ARIMA(3,1,0)                               : 46.77066
##  ARIMA(3,1,0)            with drift         : 49.42144
##  ARIMA(3,1,1)                               : 49.90649
##  ARIMA(3,1,1)            with drift         : 52.80319
##  ARIMA(3,1,2)                               : Inf
##  ARIMA(3,1,2)            with drift         : Inf
##  ARIMA(4,1,0)                               : 49.90939
##  ARIMA(4,1,0)            with drift         : 52.80723
##  ARIMA(4,1,1)                               : 53.4198
##  ARIMA(4,1,1)            with drift         : Inf
##  ARIMA(5,1,0)                               : 53.41251
##  ARIMA(5,1,0)            with drift         : 56.69595
## 
## 
## 
##  Best model: ARIMA(3,1,0)
## Series: carsls_sub 
## ARIMA(3,1,0) 
## 
## Coefficients:
##           ar1      ar2     ar3
##       -0.4560  -0.1214  0.4010
## s.e.   0.1811   0.1998  0.1803
## 
## sigma^2 = 0.278:  log likelihood = -18.39
## AIC=44.77   AICc=46.77   BIC=49.65
#Residual Analysis
carsls_arima = auto.arima(carsls_sub,trace=TRUE, stepwise = FALSE)
## 
##  ARIMA(0,1,0)                               : 51.57352
##  ARIMA(0,1,0)            with drift         : 53.77824
##  ARIMA(0,1,1)                               : 48.24721
##  ARIMA(0,1,1)            with drift         : 49.88389
##  ARIMA(0,1,2)                               : 48.51251
##  ARIMA(0,1,2)            with drift         : 50.95166
##  ARIMA(0,1,3)                               : 50.25856
##  ARIMA(0,1,3)            with drift         : 53.11982
##  ARIMA(0,1,4)                               : Inf
##  ARIMA(0,1,4)            with drift         : Inf
##  ARIMA(0,1,5)                               : Inf
##  ARIMA(0,1,5)            with drift         : Inf
##  ARIMA(1,1,0)                               : 48.62806
##  ARIMA(1,1,0)            with drift         : 50.76726
##  ARIMA(1,1,1)                               : 50.17333
##  ARIMA(1,1,1)            with drift         : 52.19397
##  ARIMA(1,1,2)                               : 51.28083
##  ARIMA(1,1,2)            with drift         : 54.071
##  ARIMA(1,1,3)                               : Inf
##  ARIMA(1,1,3)            with drift         : Inf
##  ARIMA(1,1,4)                               : Inf
##  ARIMA(1,1,4)            with drift         : Inf
##  ARIMA(2,1,0)                               : 48.20508
##  ARIMA(2,1,0)            with drift         : 49.80281
##  ARIMA(2,1,1)                               : Inf
##  ARIMA(2,1,1)            with drift         : Inf
##  ARIMA(2,1,2)                               : 49.31717
##  ARIMA(2,1,2)            with drift         : 52.19999
##  ARIMA(2,1,3)                               : Inf
##  ARIMA(2,1,3)            with drift         : Inf
##  ARIMA(3,1,0)                               : 46.77066
##  ARIMA(3,1,0)            with drift         : 49.42144
##  ARIMA(3,1,1)                               : 49.90649
##  ARIMA(3,1,1)            with drift         : 52.80319
##  ARIMA(3,1,2)                               : Inf
##  ARIMA(3,1,2)            with drift         : Inf
##  ARIMA(4,1,0)                               : 49.90939
##  ARIMA(4,1,0)            with drift         : 52.80723
##  ARIMA(4,1,1)                               : 53.4198
##  ARIMA(4,1,1)            with drift         : Inf
##  ARIMA(5,1,0)                               : 53.41251
##  ARIMA(5,1,0)            with drift         : 56.69595
## 
## 
## 
##  Best model: ARIMA(3,1,0)
carsls_arima_res = carsls_arima$residuals

#plot
plot(carsls_arima_res)
abline(h=0,col="red")

#Histogram
hist(carsls_arima_res)

#Plot of fitted vs. residuals
carsls_arima_fitted = fitted(carsls_arima)
ggplot(data = data.frame(fitted = carsls_arima_fitted, residuals = carsls_arima_res), 
       aes(x = fitted, y = residuals)) + geom_point() + geom_hline(yintercept = 0, color="red")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

#Plot of actual vs. residuals
plot(as.numeric(carsls_sub),carsls_arima_res, xlab="actual",ylab="residuals")
abline(h=0,col="red")

#Acf
plot(Acf(carsls_arima_res))

#Measures of accuracy
accuracy(carsls_arima)
##                      ME      RMSE      MAE       MPE     MAPE      MASE
## Training set 0.05453538 0.4850027 0.369875 0.2882758 2.481099 0.2323857
##                    ACF1
## Training set 0.01221806
#Forecast for next 1 & 2 year
carsls_arima_fc_1Y = forecast(carsls_arima,h=12)
carsls_arima_fc_2Y = forecast(carsls_arima,h=24)

#Table and plot
# 1Y
carsls_arima_fc_1Y
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 2024       16.16459 15.48888 16.84029 15.13119 17.19799
## Apr 2024       15.74432 14.97511 16.51353 14.56792 16.92072
## May 2024       16.21102 15.33169 17.09035 14.86619 17.55584
## Jun 2024       16.03861 14.90985 17.16737 14.31233 17.76489
## Jul 2024       15.89209 14.67744 17.10673 14.03444 17.74973
## Aug 2024       16.16695 14.83107 17.50283 14.12390 18.21000
## Sep 2024       15.99026 14.50401 17.47652 13.71723 18.26329
## Oct 2024       15.97873 14.41346 17.54400 13.58486 18.37260
## Nov 2024       16.11564 14.44085 17.79042 13.55428 18.67700
## Dec 2024       15.98376 14.20283 17.76470 13.26006 18.70747
## Jan 2025       16.02266 14.16621 17.87911 13.18346 18.86186
## Feb 2025       16.07582 14.12419 18.02745 13.09106 19.06058
plot(carsls_arima_fc_1Y)

# 2Y
carsls_arima_fc_2Y
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 2024       16.16459 15.48888 16.84029 15.13119 17.19799
## Apr 2024       15.74432 14.97511 16.51353 14.56792 16.92072
## May 2024       16.21102 15.33169 17.09035 14.86619 17.55584
## Jun 2024       16.03861 14.90985 17.16737 14.31233 17.76489
## Jul 2024       15.89209 14.67744 17.10673 14.03444 17.74973
## Aug 2024       16.16695 14.83107 17.50283 14.12390 18.21000
## Sep 2024       15.99026 14.50401 17.47652 13.71723 18.26329
## Oct 2024       15.97873 14.41346 17.54400 13.58486 18.37260
## Nov 2024       16.11564 14.44085 17.79042 13.55428 18.67700
## Dec 2024       15.98376 14.20283 17.76470 13.26006 18.70747
## Jan 2025       16.02266 14.16621 17.87911 13.18346 18.86186
## Feb 2025       16.07582 14.12419 18.02745 13.09106 19.06058
## Mar 2025       15.99398 13.95788 18.03009 12.88003 19.10794
## Apr 2025       16.04045 13.93186 18.14903 12.81564 19.26525
## May 2025       16.05050 13.85911 18.24190 12.69906 19.40195
## Jun 2025       16.00746 13.74321 18.27172 12.54458 19.47035
## Jul 2025       16.04450 13.71125 18.37775 12.47610 19.61291
## Aug 2025       16.03687 13.63043 18.44331 12.35653 19.71720
## Sep 2025       16.01860 13.54621 18.49099 12.23740 19.79979
## Oct 2025       16.04270 13.50515 18.58026 12.16185 19.92356
## Nov 2025       16.03087 13.42733 18.63441 12.04909 20.01264
## Dec 2025       16.02601 13.36119 18.69083 11.95052 20.10151
## Jan 2026       16.03933 13.31318 18.76548 11.87004 20.20862
## Feb 2026       16.02910 13.24234 18.81587 11.76711 20.29109
plot(carsls_arima_fc_2Y)

10. Accuracy Summary

d=accuracy(carsls_naive)
e=accuracy(MA6_fc)
f=accuracy(carsls_ets_fc)
g=accuracy(carsls_hw)
h=accuracy(carsls_arima)

accuracymeasure = rbind(d,e,f,g,h)
rownames(accuracymeasure) = c("naive","MA6","Simple smoothing","HW","ARIMA")

accuracymeasure
##                            ME       RMSE        MAE         MPE      MAPE
## naive             0.053000000 0.64989381 0.51412000  0.24604323 3.4113871
## MA6               0.002340272 0.06812097 0.05171541  0.02845714 0.3385603
## Simple smoothing  0.100879843 0.56621504 0.43485016  0.55536757 2.8898006
## HW               -0.017603957 0.65555899 0.51185559 -0.15861944 3.3074032
## ARIMA             0.054535382 0.48500267 0.36987497  0.28827581 2.4810986
##                        MASE        ACF1
## naive            0.32301216 -0.43569823
## MA6              0.02762252  0.08423552
## Simple smoothing 0.27320838 -0.12274966
## HW               0.32158947  0.19386313
## ARIMA            0.23238566  0.01221806

About the methods

Best and worst method for each accuracy measure.

Overall, Moving Average is the best model that outstands all other models in accuracy measures.

10. Conclusion

MA6_fc_2y = forecast(MA6,h=24)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
MA6_fc_2y
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Dec 2023       15.93874 15.837018 16.04046 15.783171 16.09431
## Jan 2024       15.89648 15.669163 16.12379 15.548829 16.24413
## Feb 2024       15.85422 15.474076 16.23436 15.272840 16.43560
## Mar 2024       15.81196 15.255828 16.36809 14.961429 16.66249
## Apr 2024       15.76970 15.017156 16.52225 14.618782 16.92062
## May 2024       15.72744 14.760039 16.69484 14.247927 17.20696
## Jun 2024       15.68518 14.485990 16.88437 13.851176 17.51919
## Jul 2024       15.64292 14.196210 17.08964 13.430367 17.85548
## Aug 2024       15.60066 13.891683 17.30964 12.987004 18.21432
## Sep 2024       15.55840 13.573232 17.54358 12.522346 18.59446
## Oct 2024       15.51615 13.241558 17.79073 12.037465 18.99483
## Nov 2024       15.47389 12.897265 18.05051 11.533285 19.41449
## Dec 2024       15.43163 12.540881 18.32237 11.010613 19.85264
## Jan 2025       15.38937 12.172870 18.60587 10.470158 20.30858
## Feb 2025       15.34711 11.793642 18.90058  9.912550 20.78167
## Mar 2025       15.30485 11.403564 19.20613  9.338348 21.27135
## Apr 2025       15.26259 11.002963 19.52222  8.748053 21.77713
## May 2025       15.22033 10.592135 19.84853  8.142116 22.29855
## Jun 2025       15.17807 10.171343 20.18480  7.520941 22.83520
## Jul 2025       15.13581  9.740827 20.53080  6.884894 23.38673
## Aug 2025       15.09355  9.300801 20.88630  6.234304 23.95280
## Sep 2025       15.05129  8.851461 21.25113  5.569468 24.53312
## Oct 2025       15.00903  8.392981 21.62509  4.890654 25.12741
## Nov 2025       14.96677  7.925518 22.00803  4.198102 25.73545
plot(MA6_fc_2y)